########################################################################################################################
##
## Human Development Report Office (HDRO), United Nations Development Programme
## Multidimensional Poverty Index (MPI) 2020 release
##
## This code calculates the MPI and its component using the 2017/2018 DHS data for Benin.
## Users should first download the DHS data available at https://dhsprogram.com/data/available-datasets.cfm
## Users should also download the macro packages from WHO that calculates anthropometric z scores for children under 5 years (https://www.who.int/childgrowth/software/en/) 
## and for older boys and girls 15-19 years (https://www.who.int/growthref/tools/en/). Please follow WHO instructions.
## The WHO macro for R produces z scores that are slightly different in comparison with the z scores calculated in Stata. This is manually fixed in lines 214-228 below.
## WHO will update the macro in R, after this is done, the manual correction will not be necessary.
## 
## For now, MPI programs in R are available for 4 selected countries (Benin, Republic of Congo, India and Iraq). 
## This is still an experimental phase and HDRO plans to expand the availability of such programs. 
## However, users can adapt any of the MPI codes in R and produce programs for other countries. The modifications will depend on the information collected in the data for the other countries.
## We welcome feedback from the users. 
########################################################################################################################
  
### Set-up ### 
rm(list=ls())             # Clean up the environment
options(scipen=6)         # Display digits, not the scientific version
par(mfrow=c(1,1))         # Reset plot placement to normal 1 by 1
options(warn = -1)

### Working Folder Path ###

path_in <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Benin/"
path_out <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Benin/"
path_logs <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Benin/"
path_pc <- "C:/Users/cecilia.calderon/Documents/HDRO_MCC/MPI/MPI 2.0/Kathrin consultancy R/Benin/"


### Log file ### 
#sink(file.path(path_logs,"ben_dhs18_dataprep.txt"), split = TRUE)

### WHO2007 R macro package ###
# https://www.who.int/growthref/tools/readme_r.pdf?ua=1
wfawho2007<-read.table(file.path(path_in,"wfawho2007.txt"),header=T,sep="",skip=0)
hfawho2007<-read.table(file.path(path_in,"hfawho2007.txt"),header=T,sep="",skip=0) 
bfawho2007<-read.table(file.path(path_in,"bfawho2007.txt"),header=T,sep="",skip=0) 
source(file.path(path_in,"who2007.r"))

### Packages ###
# install.packages(c("haven", "Hmisc", "plyr", "memisc", "expss", "questionr", "anthro", "survey"))
library(haven)      # ready in dta file
library(Hmisc)      # to label variables
library(plyr)       # used for desc
library(memisc)     # command as codebook
library(expss)      # table with label
library(questionr)  # lookfor command
library(anthro)     # https://www.who.int/childgrowth/software/en/
library(survey)     # takes survey design into account    

########################################################################################################################
### Benin DHS 2017-18                                                                                                ###
########################################################################################################################
  
  
########################################################################################################################
### Step 1: Data preparation 
### Selecting variables from KR, BR, IR, & MR recode & merging with PR recode 
########################################################################################################################
# In Benin DHS 2017-18, height and weight measurements were collected from children (0-5) in 100% of sample and for women 
# (15-49) in 50% of the sample. We use 100% of sample for MPI.


########################################################################################################################
### Step 1.1 KR - CHILDREN's RECODE (under 5)
########################################################################################################################
DataKR <- read_stata(file.path(path_in, "BJPR71FL.DTA"))
### Generate individual unique key variable required for data merging
### hv001=cluster number; 
### hv002=household number; 
### hvidx=household line number in household

DataKR$ind_id <- DataKR$hv001*1000000 + DataKR$hv002*100 + DataKR$hvidx
label(DataKR$ind_id) <- "Individual ID"
str(DataKR$ind_id)

anyDuplicated(DataKR$ind_id) 

DataKR <- subset(DataKR, hv120 == 1) 

DataKR$child_KR <- 1 
	  #Generate identification variable for observations in KR recode

### Next check the variables that WHO needs to calculate the z-scores:
### sex, age, weight, height

### Variable: SEX ###
table(DataKR$hv104, useNA = "always")
    ### "1" for male ;"2" for female
DataKR$gender <- DataKR$hv104 
str(DataKR$gender)
table(DataKR$gender, useNA = "always")


### Variable: AGE ###
table(DataKR$hc1, useNA = "always")
codebook(DataKR$hc1)
    # Age is measured in months
DataKR$age_months <- DataKR$hc1
describe(DataKR$age_months)
summary(DataKR$age_months)
DataKR$ageunit <- "months"
label(DataKR$ageunit) <- "Months"
DataKR$mdate <- as.Date(paste(DataKR$hc18, DataKR$hc17, DataKR$hc19, sep = "/"), format = "%m/%d/%Y")
DataKR$bdate <- as.Date(paste(DataKR$hc30, DataKR$hc16, DataKR$hc31, sep = "/"), format = "%m/%d/%Y")
DataKR$bdate[which(DataKR$hc16>31)] <- as.Date(paste(DataKR$hc30, 15, DataKR$hc31, sep = "/"), format = "%m/%d/%Y")
      # Calculate birth date in days from date of interview
DataKR$age <- (as.numeric(DataKR$mdate) - as.numeric(DataKR$bdate))/30.4375
    # Calculate age in months 

DataKR$age2 <- DataKR$hc1a/30.4375
DataKR$comapre <- ifelse(DataKR$age == DataKR$age2,0,1)
table(DataKR$comapre, useNA = "always")
DataKR$age2 <- NULL


### Variable: BODY WEIGHT (KILOGRAMS) ###
describe(DataKR$hc2)
table(DataKR$hc2, useNA = "always")
DataKR$weight <- DataKR$hc2/10 
    # We divide it by 10 in order to express it in kilograms 
table(DataKR$hc2[DataKR$hc2>9990], useNA = "always")  
    # Missing values are 9994 to 9996
DataKR$weight[DataKR$hc2>=9990] <- NA 
    # All missing values or out of range are replaced as "NA"
table(DataKR$hc13[DataKR$hc2>=9990], DataKR$hc2[DataKR$hc2>=9990], useNA = "always")
    # hc13: result of the measurement
describe(DataKR$weight) 
summary(DataKR$weight)


### Variable: HEIGHT (CENTIMETERS) ###
describe(DataKR$hc3)
table(DataKR$hc3, useNA = "always")
DataKR$height <- DataKR$hc3/10
    # We divide it by 10 in order to express it in centimeters
table(DataKR$hc3[DataKR$hc3>9990], useNA = "always")  
    # Missing values are 9994 to 9996
DataKR$height[DataKR$hc3>=9990] <- NA 
    # All missing values or out of range are replaced as "NA"
table(DataKR$hc13[DataKR$hc3>=9990], DataKR$hc3[DataKR$hc3>=9990], useNA = "always")
describe(DataKR$height) 
summary(DataKR$height)


### Variable: MEASURED STANDING/LYING DOWN ###
describe(DataKR$hc15)  
DataKR$measure[DataKR$hc15==1] <- "l" 
    # Child measured lying down
DataKR$measure[DataKR$hc15==2] <- "h" 
    # Child measured standing up
DataKR$measure[DataKR$hc15==9 | DataKR$hc15==0] <- NA 
    # Replace with "NA" if unknown
describe(DataKR$measure)
table(DataKR$measure, useNA = "always")


### Variable: OEDEMA ###
lookfor(DataKR, "oedema")
DataKR$oedema <- "n"  
    # It assumes no-one has oedema
describe(DataKR$oedema)
table(DataKR$oedema, useNA = "always")	


### Variable: INDIVIDUAL CHILD SAMPLING WEIGHT ### 
DataKR$sw <- DataKR$hv005/1000000 
    # For DHS sample weight has to be divided 1000000
describe(DataKR$sw)
summary(DataKR$sw)


# We now run the command to calculate the z-scores with the R-Command #
children_nutri_ben_z_rc <- with(DataKR, anthro_zscores
                                        (sex = gender, 
                                         age = age, 
                                         is_age_in_month = TRUE, 
                                         weight = weight,
                                         lenhei = height,
                                         oedema = oedema
                                         )
                              )


### Standard MPI indicator ### 
    # Takes value 1 if the child is under 2 stdev below the median & 0 otherwise
children_nutri_ben_z_rc$underweight <- ifelse(children_nutri_ben_z_rc$zwei < -2.0,1,0)
children_nutri_ben_z_rc$underweight[is.na(children_nutri_ben_z_rc$zwei)] <- 0
children_nutri_ben_z_rc$underweight[is.na(children_nutri_ben_z_rc$zwei) | children_nutri_ben_z_rc$fwei == 1] <- NA
label(children_nutri_ben_z_rc$underweight) <- "Child is undernourished (weight-for-age) 2sd - WHO"
table(children_nutri_ben_z_rc$underweight, useNA = "always")

children_nutri_ben_z_rc$stunting <- ifelse(children_nutri_ben_z_rc$zlen < -2.0,1,0)
children_nutri_ben_z_rc$stunting[is.na(children_nutri_ben_z_rc$zlen)] <- 0
children_nutri_ben_z_rc$stunting[is.na(children_nutri_ben_z_rc$zlen) | children_nutri_ben_z_rc$flen == 1] <- NA
label(children_nutri_ben_z_rc$stunting) <- "Child is stunted (length/height-for-age) 2sd - WHO"
table(children_nutri_ben_z_rc$stunting, useNA = "always")

children_nutri_ben_z_rc$wasting <- ifelse(children_nutri_ben_z_rc$zwfl < -2.0,1,0)
children_nutri_ben_z_rc$wasting[is.na(children_nutri_ben_z_rc$zwfl)] <- 0
children_nutri_ben_z_rc$wasting[is.na(children_nutri_ben_z_rc$zwfl) | children_nutri_ben_z_rc$fwfl == 1] <- NA
label(children_nutri_ben_z_rc$wasting) <- "Child is wasted (weight-for-length/height) 2sd - WHO"
table(children_nutri_ben_z_rc$wasting, useNA = "always")


# Retain relevant variables:
ben18_KR <- cbind(children_nutri_ben_z_rc,DataKR)
ben18_KR <- ben18_KR[c("underweight", "stunting", "wasting", "ind_id", "child_KR")] 
rm("children_nutri_ben_z_rc")

# comparing the results of R and Stata shows that stunting has slightly different results in R for the observations
# for stunting
# ind_id == 7002503
# ind_id == 179009905
# ind_id == 227006203
#
# for wasting
# ind_id == 113007405
# XXX correct when solved problem 

# remove after fxing
 ben18_KR$stunting[ben18_KR$ind_id==7002503] <- 0
 ben18_KR$stunting[ben18_KR$ind_id==179009905] <- 0
 ben18_KR$stunting[ben18_KR$ind_id==227006203] <- 0
 ben18_KR$wasting[ben18_KR$ind_id==113007405] <- 1

ben18_KR[order(ben18_KR$ind_id),] 
anyDuplicated(ben18_KR$ind_id) 


########################################################################################################################
### Step 1.2  BR - BIRTH RECODE
### (All females 15-49 years who ever gave birth) 
########################################################################################################################
DataBR <- read_stata(file.path(path_in, "BJBR71FL.DTA"))

### Generate individual unique key variable required for data merging
### v001=cluster number; 
### v002=household number; 
### v003=respondent's line number

DataBR$ind_id <- DataBR$v001*1000000 + DataBR$v002*100 + DataBR$v003
label(DataBR$ind_id) <- "Individual ID"
str(DataBR$ind_id)

describe(DataBR$b3)
describe(DataBR$b7)        
DataBR$date_death <- DataBR$b3 + DataBR$b7
    # Date of death = date of birth (b3) + age at death (b7)
DataBR$mdead_survey <-  DataBR$v008 - DataBR$date_death
    # Months dead from survey = Date of interview (v008) - date of death
DataBR$ydead_survey <- DataBR$mdead_survey/12
    # Years dead from survey

describe(DataBR$b5)
table(DataBR$b5, useNA = "always")
DataBR$child_died[DataBR$b5==0] <- 1
    # Redefine the coding and labels (1=child dead; 0=child alive)
DataBR$child_died[DataBR$b5==1] <- 0
DataBR$child_died[is.na(DataBR$b5)] <- NA
table(DataBR$b5, DataBR$child_died, useNA = "always")


# NOTE: For each woman, sum the number of children who died and compare to the number of sons/daughters 
# whom they reported have died 
DataBR$tot_child_died <- ave(DataBR$child_died, DataBR$ind_id, FUN = function(x) sum(x,na.rm=T))
DataBR$tot_child_died_2 <- DataBR$v206 + DataBR$v207
    # v206: sons who have died
    # v207: daughters who have died
DataBR$comapre <- ifelse(DataBR$tot_child_died == DataBR$tot_child_died_2,0,1)
table(DataBR$comapre, useNA = "always")
    # In Benin DHS 2017-18, these figures are identical
DataBR$child_died[DataBR$b7>=216] <- 0
    # counting only deaths of children <18y (216 months)

DataBR$temp[DataBR$ydead_survey<=5 & DataBR$child_died ==1] <- 1
DataBR$temp[DataBR$ydead_survey>5 & DataBR$child_died ==1] <- 0
DataBR$temp[DataBR$child_died ==0] <- 0
DataBR$tot_child_died_5y <- ave(DataBR$temp, DataBR$ind_id, FUN = function(x) sum(x,na.rm=T))

DataBR$child_died_per_wom <- ave(DataBR$tot_child_died, DataBR$ind_id, FUN =  function(x) max(x,na.rm=T)) 
label(DataBR$child_died_per_wom) <- "Total child death for each women (birth recode)"

DataBR$child_died_per_wom_5y <- ave(DataBR$tot_child_died_5y, DataBR$ind_id, FUN = function(x) max(x,na.rm=T)) 
label(DataBR$child_died_per_wom_5y) <- "Total child death for each women in the last 5 years (birth recode)"


#Keep one observation per women
DataBR[order(DataBR$ind_id),] 
DataBR<- DataBR[!duplicated(DataBR$ind_id), ]

DataBR$women_BR <- 1 
    # Identification variable for observations in BR recode


#Retain relevant variables
ben18_BR <- DataBR[c("ind_id", "women_BR", "b16", "child_died_per_wom",
                    "child_died_per_wom_5y", "b7")]

	
########################################################################################################################
### Step 1.3  IR - WOMEN's RECODE  
### (All eligible females 15-49 years in the household)
######################################################################################################################## 
DataIR <- read_stata(file.path(path_in, "BJIR71FL.DTA" ))

### Generate individual unique key variable required for data merging
### v001=cluster number; 
### v002=household number; 
### v003=respondent's line number

DataIR$ind_id <- DataIR$v001*1000000 + DataIR$v002*100 + DataIR$v003
label(DataIR$ind_id) <- "Individual ID"
str(DataIR$ind_id) 

anyDuplicated(DataIR$ind_id) 

DataIR$women_IR <- 1 
    # Identification variable for observations in IR recode

DataIR[order(DataIR$ind_id),] 
ben18_IR <- DataIR[c("ind_id", "women_IR", "v003", "v005", "v012", "v201", "v206", "v207")]
    # Save a temp file for merging with PR


########################################################################################################################
### Step 1.4  IR - WOMEN'S RECODE  
### (Girls 15-19 years in the household)
########################################################################################################################
DataPR <- read_stata(file.path(path_in, "BJPR71FL.DTA" ))

### Generate individual unique key variable required for data merging
### hv001=cluster number; 
### hv002=household number; 
### hvidx=householdline number

DataPR$ind_id <- DataPR$hv001*1000000 + DataPR$hv002*100 + DataPR$hvidx 
label(DataPR$ind_id) <- "Individual ID"
str(DataPR$ind_id) 

anyDuplicated(DataPR$ind_id) 

DataPR <- subset(DataPR, hv104==2 & hv105>=15 & hv105<=19 & hv042==1)


### Variables required to calculate the z-scores to produce BMI-for-age:

### Variable: SEX ###
DataPR$gender <- 2 


### Variable: AGE IN MONTHS ###
DataPR$comapre <- ifelse(DataPR$hv807c==DataPR$hv008,0,1)
table(DataPR$comapre, useNA = "always")
    #date of biomarker vs date of interview, they should be identical

DataPR$age_month <- DataPR$hv807c - DataPR$ha32
label(DataPR$age_month) <- "Age in months, individuals 15-19 years"	


###  Variable: AGE UNIT ### 
DataPR$ageunit <- "months" 
label(DataPR$ageunit) <- "Months"


# Calculate age in months 
### Variable: BODY WEIGHT (KILOGRAMS) ###
describe(DataPR$ha2)
table(DataPR$ha2, useNA = "always")
DataPR$weight = DataPR$ha2/10
    # We divide it by 10 in order to express it in kilograms
DataPR$weight[DataPR$ha2>=9990] <- NA 
    # All missing values or out of range are replaced as "."
summary(DataPR$weight)


### Variable: HEIGHT (CENTIMETERS) ###
describe(DataPR$ha3)
table(DataPR$ha3, useNA = "always")
DataPR$height = DataPR$ha3/10 
    # We divide it by 10 in order to express it in centimeters
DataPR$height[DataPR$ha3>9990] <- NA 
    # All missing values or out of range are replaced as "."
summary(DataPR$height)


### Variable: OEDEMA ***
lookfor(DataPR, "oedema")
DataPR$oedema <- "n"  
# It assumes no-one has oedema
describe(DataPR$oedema)
table(DataPR$oedema, useNA = "always")	


### Variable: SAMPLING WEIGHT ### 
DataPR$sw <- DataPR$hv005/1000000 
# For DHS sample weight has to be divided 1000000
summary(DataPR$sw)
DataPR <- as.data.frame(DataPR)

# We now run the command to calculate the z-scores with the R-Command #
who2007(FilePath="C:/Users/kathrin/Desktop/Consultancy",
                            FileLab = "girl_nutri_ben_z",
                            mydf=DataPR,
                            sex=gender,
                            age=age_month,
                            weight=weight,
                            height=height,
                            oedema=oedema,
                            sw=sw)

girl_nutri_ben_z <- read.csv(file.path(path_in, "girl_nutri_ben_z_z.csv"))

### Standard MPI Indicator ###
girl_nutri_ben_z$z_bmi <- girl_nutri_ben_z$zbfa
girl_nutri_ben_z$z_bmi[girl_nutri_ben_z$fbfa==1] <- NA
label(girl_nutri_ben_z$z_bmi) <- "z-score bmi-for-age WHO"

girl_nutri_ben_z$low_bmiage <- ifelse(girl_nutri_ben_z$z_bmi < -2.0,1,0) 
    # Takes value 1 if BMI-for-age is under 2 stdev below the median & 0 otherwise
girl_nutri_ben_z$low_bmiage[is.na(girl_nutri_ben_z$z_bmi)] <- NA
label(girl_nutri_ben_z$low_bmiage) <- "Teenage low bmi 2sd - WHO"

girl_nutri_ben_z$teen_IR <- 1
    # Identification variable for observations in IR recode (only 15-19 years)	


#Retain relevant variables:	
ben18_IR_girls <- girl_nutri_ben_z[c("ind_id", "teen_IR", "age_month", "low_bmiage")]
rm("girl_nutri_ben_z")
ben18_IR_girls[order(ben18_IR_girls$ind_id),] 


 
########################################################################################################################
### Step 1.5  MR - MEN'S RECODE  
### (All eligible man: 15-64 years in the household) 
########################################################################################################################  
DataMR <- read_stata(file.path(path_in, "BJMR71FL.DTA"))

### Generate individual unique key variable required for data merging
### mv001=cluster number; 
### mv002=household number; 
### mv003=respondent's line number

DataMR$ind_id <- DataMR$mv001*1000000 + DataMR$mv002*100 + DataMR$mv003
label(DataMR$ind_id) <- "Individual ID"
str(DataMR$ind_id) 

anyDuplicated(DataMR$ind_id) 

DataMR$men_MR <- 1 	
    # Identification variable for observations in MR recode

DataMR[order(DataMR$ind_id),] 
ben18_MR <- DataMR[c("ind_id", "men_MR", "mv003", "mv005", "mv012", "mv201", "mv206", "mv207")]
    # Save a temp file for merging with PR:


########################################################################################################################  
### Step 1.6a  MR - MEN'S RECODE  
### (Boys 15-19 years in the household) 
########################################################################################################################  
# Note: In the case of Benin 2017-18, anthropometric data was not collected for men.
DataMR <- read_stata(file.path(path_in, "BJMR71FL.DTA"))

### Generate individual unique key variable required for data merging
### mv001=cluster number; 
### mv002=household number; 
### mv003=respondent's line number

DataMR$ind_id <- DataMR$mv001*1000000 + DataMR$mv002*100 + DataMR$mv003
label(DataMR$ind_id) <- "Individual ID"
str(DataMR$ind_id) 

anyDuplicated(DataMR$ind_id) 

DataMR$age_month_boys <-  NA

DataMR$low_bmiage_boys <- NA
label(DataMR$low_bmiage_boys) <- "Teenage low bmi 2sd - WHO"

DataMR <- subset(DataMR, mv012>=15 & mv012<=19)
    # Keep only boys between age 15-19 years to compute BMI-for-age

DataMR$teen_MR <-1


#Retain relevant variables:	
DataMR[order(DataMR$ind_id),] 
ben18_MR_boys <- DataMR[c("ind_id", "teen_MR", "age_month_boys", "low_bmiage_boys")]
    # Save a temp file for merging with PR


########################################################################################################################
### Step 1.7  PR - HOUSEHOLD MEMBER'S RECODE 
########################################################################################################################
DataPR <- read_stata(file.path(path_in, "BJPR71FL.DTA"))

DataPR$cty <- "Benin" 
DataPR$ccty <- "BEN"  
DataPR$year <- "2017-18"  
DataPR$survey <- "DHS"
DataPR$ccnum <- 204

### Generate a household unique key variable at the household level using: 
### hv001=cluster number 
### hv002=household number
DataPR$hh_id <- DataPR$hv001*10000 + DataPR$hv002 
label(DataPR$hh_id) <- "Household ID"
describe(DataPR$hh_id)  


### Generate individual unique key variable required for data merging using:
### hv001=cluster number; 
### hv002=household number; 
### hvidx=respondent's line number.
DataPR$ind_id = DataPR$hv001*1000000 + DataPR$hv002*100 + DataPR$hvidx 
label(DataPR$ind_id) <- "Individual ID"
describe(DataPR$ind_id)

DataPR[order(c(DataPR$hh_id, DataPR$ind_id)),] 



########################################################################################################################
### 1.8 DATA MERGING
########################################################################################################################

### Merging BR Recode 
#########################################
data_merge_1 <- merge(DataPR, ben18_BR,by="ind_id", all=TRUE)
rm("ben18_BR")


### Merging IR Recode 
#########################################
data_merge_2 <- merge(data_merge_1, ben18_IR, by="ind_id", all=TRUE)
rm("ben18_IR")

table(data_merge_2$women_IR, data_merge_2$hv117, useNA = "always")
table(data_merge_2$ha65[data_merge_2$hv117==1 & is.na(data_merge_2$women_IR)], useNA = "always")
    # Total number of eligible women not interviewed
table(data_merge_2$ha65[is.na(data_merge_2$women_IR) & data_merge_2$hv117==1], 
      data_merge_2$ha13[is.na(data_merge_2$women_IR) & data_merge_2$hv117==1], useNA ="always")  


### Merging IR Recode: 15-19 years girls 
#########################################
data_merge_3 <- merge(data_merge_2, ben18_IR_girls, by="ind_id", all=TRUE)
rm("ben18_IR_girls")

table(data_merge_3$teen_IR[data_merge_3$hv105>=15 & data_merge_3$hv105<=19 & data_merge_3$hv042==1],
      data_merge_3$hv117[data_merge_3$hv105>=15 & data_merge_3$hv105<=19 & data_merge_3$hv042==1], useNA ="always")


### Merging MR Recode 
#########################################
data_merge_4 <- merge(data_merge_3, ben18_MR, by="ind_id", all=TRUE)
rm("ben18_MR")

table(data_merge_4$men_MR, data_merge_4$hv118, useNA = "always")


### Merging MR Recode: 15-19 years boys 
#########################################
data_merge_5 <- merge(data_merge_4, ben18_MR_boys, by="ind_id", all=TRUE)
rm("ben18_MR_boys")


### Merging KR Recode 
#########################################
data_merge_6 <- merge(data_merge_5, ben18_KR, by="ind_id", all=TRUE)
rm("ben18_KR")

DataFinal <- data_merge_6
rm("data_merge_1", "data_merge_2","data_merge_3","data_merge_4","data_merge_5")
rm("DataBR", "DataIR","DataKR","DataMR","DataPR")

########################################################################################################################
### Step 1.9 KEEPING ONLY DE JURE HOUSEHOLD MEMBERS                       
########################################################################################################################
# Permanent (de jure) household members 
DataFinal$resident <- DataFinal$hv102 
describe(DataFinal$resident)
table(DataFinal$resident, useNA = "always")
label(DataFinal$resident) <- "Permanent (de jure) household member"


DataFinal <- subset(DataFinal, DataFinal$resident==1) 
table(DataFinal$resident, useNA = "always")
# Note: The Global MPI is based on de jure (permanent) household members only. As such, non-usual residents will be 
# excluded from the sample. In the context of Benin DHS 2017-18, 945 (1.27%) individuals who were non-usual residents 
# were dropped from the sample


########################################################################################################################
### 1.10 CONTROL VARIABLES
########################################################################################################################
# Households are identified as having 'no eligible' members if there are no applicable population, that is, children 0-5 
# years, adult women 15-49 years or men 15-64 years. These households will not have information on relevant indicators of 
# health. As such, these households are considered as non-deprived in those relevant indicators.


### No Eligible Women 15-49 years
#########################################
DataFinal$fem_eligible <- ifelse(DataFinal$hv117==1,1,0)
DataFinal$hh_n_fem_eligible <- ave(DataFinal$fem_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Number of eligible women for interview in the hh
DataFinal$no_fem_eligible <- ifelse(DataFinal$hh_n_fem_eligible==0,1,0)
    # Takes value 1 if the household had no eligible females for an interview
label(DataFinal$no_fem_eligible) <- "Household has no eligible women"
table(DataFinal$no_fem_eligible, useNA = "always")


### No Eligible Men 15-64 years
#########################################
DataFinal$male_eligible <- ifelse(DataFinal$hv118==1,1,0)
DataFinal$hh_n_male_eligible <- ave(DataFinal$male_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
     # Number of eligible men for interview in the hh
DataFinal$no_male_eligible <- ifelse(DataFinal$hh_n_male_eligible==0,1,0)
    # Takes value 1 if the household had no eligible males for an interview
label(DataFinal$no_male_eligible) <- "Household has no eligible man"
table(DataFinal$no_male_eligible, useNA = "always")


### No Eligible Children 0-5 years
#########################################
DataFinal$child_eligible <- ifelse(DataFinal$hv120==1,1,0)
DataFinal$hh_n_children_eligible <- ave(DataFinal$child_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Number of eligible children for anthropometrics
DataFinal$no_child_eligible <- ifelse(DataFinal$hh_n_children_eligible==0,1,0) 
    # Takes value 1 if there were no eligible children for anthropometrics
label(DataFinal$no_child_eligible) <- "Household has no children eligible"
table(DataFinal$no_child_eligible, useNA = "always")


### No Eligible Women and Men 
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the child mortality indicator if mortality 
# data was collected from women and men. If child mortality was only colelcted from women, the we use 'no_fem_eligible' 
# as the eligibility criteria 
DataFinal$no_adults_eligible <-ifelse(DataFinal$no_fem_eligible==1 & DataFinal$no_male_eligible==1,1,0) 
    # Takes value 1 if the household had no eligible men & women for an interview
label(DataFinal$no_adults_eligible) <- "Household has no eligible women or men"
table(DataFinal$no_adults_eligible, useNA = "always") 


### No Eligible Children and Women  
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the nutrition indicator if nutrition data 
# is present for children and women.
DataFinal$no_child_fem_eligible <- ifelse(DataFinal$no_child_eligible==1 & DataFinal$no_fem_eligible==1,1,0) 
label(DataFinal$no_child_fem_eligible) <- "Household has no children or women eligible"
table(DataFinal$no_child_fem_eligible, useNA = "always") 


### No Eligible Women, Men or Children 
#########################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the nutrition indicator if nutrition data 
# is present for children, women and men.
DataFinal$no_eligibles <- ifelse(DataFinal$no_fem_eligible==1 & DataFinal$no_male_eligible==1 & DataFinal$no_child_eligible==1,1,0)
label(DataFinal$no_eligibles) <- "Household has no eligible women, men, or children"
table(DataFinal$no_eligibles, useNA = "always")


### No Eligible Subsample 
#########################################
# hv042 (household selected for hemoglobin) is essentially a variable that indicates whether there is selection of a 
# subsample for anthropometric data.	
DataFinal$hem_eligible <- ifelse(DataFinal$hv042==1,1,0) 
DataFinal$hh_n_hem_eligible <- ave(DataFinal$hem_eligible, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
DataFinal$no_hem_eligible <- ifelse(DataFinal$hh_n_hem_eligible==0,1,0) 
    # Takes value 1 if the HH had no eligible females for hemoglobin test	
label(DataFinal$no_hem_eligible) <- "Household has no eligible individuals for hemoglobin measurements"
table(DataFinal$no_hem_eligible, useNA ="always")


DataFinal <- DataFinal[!names(DataFinal) %in% c("fem_eligible", "hh_n_fem_eligible", "male_eligible", "hh_n_male_eligible",
                        "child_eligible", "hh_n_children_eligible", "hem_eligible", "hh_n_hem_eligible")]


########################################################################################################################
### 1.11 SUBSAMPLE VARIABLE 
########################################################################################################################
# In Benin DHS 2017-18, height and weight measurements were collected from children (0-5) in 100% of sample and for 
# women (15-49) in 50% of the sample. We use 100% of sample for MPI.
DataFinal$subsample <- 1
label(DataFinal$subsample) <- "Households selected as part of nutrition subsample" 
table(DataFinal$subsample, useNA = "always")


########################################################################################################################
### 1.12 RENAMING DEMOGRAPHIC VARIABLES ***
########################################################################################################################
# Sample weight
describe(DataFinal$hv005)
DataFinal$weight <- DataFinal$hv005 
label(DataFinal$weight) <- "Sample weight"


# Area: urban or rural	
describe(DataFinal$hv025)
str(DataFinal$hv025)
table(DataFinal$hv025, useNA = "always")
DataFinal$area <- DataFinal$hv025  
DataFinal$area[DataFinal$area==2] <- 0  
DataFinal$area <- factor(DataFinal$area,
                             levels = c(0,1),
                             labels = c("rural", "urban")) 
label(DataFinal$area) <- "Area: urban-rural"


# Relationship to the head of household 
DataFinal$relationship <- DataFinal$hv101 
describe(DataFinal$relationship)
table(DataFinal$relationship, useNA = "always")
DataFinal$relationship[DataFinal$relationship==11 | DataFinal$relationship== 14] <- 3
DataFinal$relationship[DataFinal$relationship>=4 & DataFinal$relationship<= 10] <- 4
DataFinal$relationship[DataFinal$relationship==12 | DataFinal$relationship== 13] <- 5
DataFinal$relationship <- factor(DataFinal$relationship,
                                  levels = c(1,2,3,4,5),
                                  labels = c("head", "spouse", "child", "extended family", "not related"))
label(DataFinal$relationship) <- "Relationship to the head of household"
table(DataFinal$hv101, DataFinal$relationship, useNA = "always")


# Sex of household member	
describe(DataFinal$hv104)
table(DataFinal$hv104, useNA = "always")
DataFinal$sex <- DataFinal$hv104  
label(DataFinal$sex) <- "Sex of household member"


# Age of household member
describe(DataFinal$hv105)
table(DataFinal$hv105, useNA = "always")
DataFinal$age <- DataFinal$hv105  
DataFinal$age[DataFinal$age>=98] <- NA
label(DataFinal$age) <- "Age of household member"


# Age group 
DataFinal$agec7[DataFinal$age>=0 & DataFinal$age<= 4] <- 1
DataFinal$agec7[DataFinal$age>=5 & DataFinal$age<= 9] <- 2
DataFinal$agec7[DataFinal$age>=10 & DataFinal$age<= 14] <- 3
DataFinal$agec7[DataFinal$age>=15 & DataFinal$age<= 17] <- 4
DataFinal$agec7[DataFinal$age>=18 & DataFinal$age<= 59] <- 5
DataFinal$agec7[DataFinal$age>=60] <- 6
DataFinal$agec7 <- factor(DataFinal$agec7,
                                 levels = c(1,2,3,4,5,6),
                                 labels = c("0-4", "5-9", "10-14", "15-17", "18-59", "60+"))
label(DataFinal$agec7) <- "age groups (7 groups)"	
DataFinal$agec4[DataFinal$age>=0 & DataFinal$age<= 9] <- 1
DataFinal$agec4[DataFinal$age>=10 & DataFinal$age<= 17] <- 2
DataFinal$agec4[DataFinal$age>=18 & DataFinal$age<= 59] <- 3
DataFinal$agec4[DataFinal$age>=60] <- 4
DataFinal$agec4 <- factor(DataFinal$agec4,
                          levels = c(1,2,3,4),
                          labels = c("0-9", "10-17", "18-59", "60+"))
label(DataFinal$agec4) <- "age groups (4 groups)"


# Marital status of household member
DataFinal$marital <- DataFinal$hv115 
describe(DataFinal$marital)
table(DataFinal$marital, useNA = "always")
DataFinal$marital[DataFinal$marital==1] <- 2
DataFinal$marital[DataFinal$marital==0] <- 1
DataFinal$marital[DataFinal$marital==8] <- NA
DataFinal$marital <- factor(DataFinal$marital,
                          levels = c(1,2,3,4,5),
                          labels = c("never married", "currently married", "widowed", "divorced", "not living together"))
label(DataFinal$marital) <- "Marital status of household member"
table(DataFinal$hv115, DataFinal$marital, useNA = "always")


# Total number of de jure hh members in the household
DataFinal$member <- 1
DataFinal$hhsize <- ave(DataFinal$member, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
label(DataFinal$hhsize) <- "Household size"
table(DataFinal$hhsize, useNA = "always")
DataFinal$member <- NULL


# Subnational region
lookfor(DataFinal, "region")
describe(DataFinal$hv024)
table(DataFinal$hv024, useNA = "always")	
DataFinal$region <- DataFinal$hv024
label(DataFinal$region) <- "Region for subnational decomposition"

table(DataFinal$hv024, DataFinal$region, useNA="always")


########################################################################################################################
###  Step 2 Data preparation  
###  Standardization of the 10 Global MPI indicators 
###  Identification of non-deprived & deprived individuals  
########################################################################################################################

########################################################################################################################
### Step 2.1 Years of Schooling 
########################################################################################################################
describe(DataFinal$hv108)
table(DataFinal$hv108, useNA = "always")
DataFinal$eduyears <- DataFinal$hv108   
    # total number of years of education
DataFinal$eduyears[DataFinal$eduyears>30] <- NA 
    # recode any unreasonable years of highest education as missing value
DataFinal$eduyears[DataFinal$eduyears>=DataFinal$age & DataFinal$age>0] <- NA 
DataFinal$eduyears[DataFinal$age<10] <- 0 
# The variable "eduyears" was replaced with a '0' given that the criteria for this indicator is household member aged 
# 10 years or older


# A control variable is created on whether there is information on years of education for at least 2/3 of the household 
# members.
DataFinal[order(c(DataFinal$hh_id)),] 
DataFinal$temp[!is.na(DataFinal$age) & DataFinal$age>=10] <- 1 
DataFinal$temp[is.na(DataFinal$eduyears)] <- NA 
DataFinal$no_missing_edu <- ave(DataFinal$temp, DataFinal$hh_id,  FUN = function(x) sum(x,na.rm=T))
    # Total household members who are 10 years and older with no missing years of education
DataFinal$temp2[DataFinal$age>=10 & !is.na(DataFinal$age)] <- 1
DataFinal$hhs <- ave(DataFinal$temp2, DataFinal$hh_id,  FUN = function(x) sum(x,na.rm=T))
    # Total number of household members who are 10 years and older 
DataFinal$no_missing_edu <- (DataFinal$no_missing_edu) / (DataFinal$hhs)
DataFinal$no_missing_edu <- ifelse(DataFinal$no_missing_edu>=2/3,1,0)
    # Identify whether there is information on years of education for at least 2/3 of the household members aged 10 
    # years and older
table(DataFinal$no_missing_edu, useNA = "always")
label(DataFinal$no_missing_edu) <- "No missing edu for at least 2/3 of the HH members aged 10 years & older"		
DataFinal <- DataFinal[!names(DataFinal) %in% c("temp", "temp2", "hhs")]

# The entire household is considered deprived if no household member aged 10 years or older has completed SIX years of 
# schooling. 

DataFinal$years_edu6 <- ifelse(DataFinal$eduyears>=6,1,0)
# The years of schooling indicator takes a value of "1" if at least someone in the hh has reported 6 years of education 
# or more 
DataFinal$years_edu6[is.na(DataFinal$eduyears)] <- NA
DataFinal$hh_years_edu6_1 <- ave(DataFinal$years_edu6, DataFinal$hh_id,  FUN = function(x) max(x,na.rm=T)) # max
DataFinal$hh_years_edu6 <- ifelse(DataFinal$hh_years_edu6_1==1,1,0)
DataFinal$hh_years_edu6[is.na(DataFinal$hh_years_edu6_1)] <- NA
DataFinal$hh_years_edu6[DataFinal$hh_years_edu6==0 & DataFinal$no_missing_edu==0] <- NA
label(DataFinal$hh_years_edu6) <- "Household has at least one member with 6 years of edu"


########################################################################################################################
### Step 2.2 Child School Attendance 
########################################################################################################################
describe(DataFinal$hv121)
table(DataFinal$hv121, useNA = "always")
DataFinal$attendance <- DataFinal$hv121 
DataFinal$attendance[DataFinal$attendence==2] <- 1
describe(DataFinal$attendance)
table(DataFinal$attendance, useNA = "always")

DataFinal$attendance[(DataFinal$attendance==9 | is.na(DataFinal$attendance)) & DataFinal$hv109==0] <- 0  
      # In some countries, they don't assess attendance for those with no educational attainment. These are replaced with
      # a '0'
DataFinal$attendance[DataFinal$attendance==9 & DataFinal$hv109!=0] <- NA
      # Replace missing values


### Old & New Standard MPI 
###############################################################################
# The entire household is considered deprived if any school-aged child is not attending school up to class 8. 
DataFinal$child_schoolage <- ifelse(DataFinal$age>=6 & DataFinal$age<=14,1,0) 
    # Note: In Benin, the official school entrance age is 7 years.So, age range is 6-14 (=6+8)
    # Source: http://data.uis.unesco.org/?ReportId=163. */
  
  
# A control variable is created on whether there is no information on school attendance for at least 2/3 of the school 
# age children 
sum(DataFinal$child_schoolage==1 & is.na(DataFinal$attendance), na.rm=TRUE)
    # Understand how many eligible school aged children are not attending school 
DataFinal$temp <- ifelse(DataFinal$child_schoolage==1 | is.na(DataFinal$attendance),1,0) 
    # Generate a variable that captures the number of eligible school aged children who are attending school 
DataFinal$no_missing_atten <- ave(DataFinal$temp, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
	  # Total school age children with no missing information on school attendance 
DataFinal$temp2 <- ifelse(DataFinal$child_schoolage==1,1,0)
DataFinal$hhs <- ave(DataFinal$temp2, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Total number of household members who are of school age
DataFinal$no_missing_atten <- (DataFinal$no_missing_atten)/(DataFinal$hhs) 
DataFinal$no_missing_atten <- ifelse(DataFinal$no_missing_atten>=2/3,1,0)
DataFinal$no_missing_atten[is.na(DataFinal$no_missing_atten)] <- 1
    # Identify whether there is missing information on school attendance for more than 2/3 of the school age children 			
table(DataFinal$no_missing_atten, useNA = "always")
label(DataFinal$no_missing_atten) <- "No missing school attendance for at least 2/3 of the school aged children"		
DataFinal <- DataFinal[!names(DataFinal) %in% c("temp", "temp2", "hhs")]


DataFinal$hh_children_schoolage <- ave(DataFinal$child_schoolage, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
DataFinal$hh_children_schoolage <- ifelse(DataFinal$hh_children_schoolage>0,1,0) 
    # Control variable: It takes value 1 if the household has children in school age
label(DataFinal$hh_children_schoolage) <- "Household has children in school age"


DataFinal$child_not_atten <- ifelse(DataFinal$attendance==0 & DataFinal$child_schoolage==1,1,0) 
DataFinal$child_not_atten[is.na(DataFinal$attendance) & DataFinal$child_schoolage==1] <- NA
DataFinal$any_child_not_atten <- ave(DataFinal$child_not_atten, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$hh_child_atten <- ifelse(DataFinal$any_child_not_atten==0,1,0)
DataFinal$hh_child_atten[is.na(DataFinal$any_child_not_atten)] <- NA
DataFinal$hh_child_atten[DataFinal$hh_children_schoolage==0] <- 1
DataFinal$hh_child_atten[DataFinal$hh_child_atten==1 & DataFinal$no_missing_atten==0] <- NA 
    # If the household has been intially identified as non-deprived, but has missing school attendance for at least 2/3
    # of the school aged children, then we replace this household with a value of '.' because there is insufficient 
    # information to conclusively conclude that the household is not deprived
label(DataFinal$hh_child_atten) <- "Household has all school age children up to class 8 in school"
table(DataFinal$hh_child_atten, useNA = "always")

# Note: The indicator takes value 1 if ALL children in school age are attending school and 0 if there is at least one 
# child not attending. Households with no children receive a value of 1 as non-deprived. The indicator has a missing 
# value only when there are all missing values on children attendance in households that have children in school age. 
  
  
########################################################################################################################
### Step 2.3 Nutrition 
########################################################################################################################

########################################################################################################################
### Step 2.3a Adult Nutrition 
########################################################################################################################
# Note: Benin DHS 2017-18 does not have anthropometric data for men 
lookfor(DataFinal, "body")  
lookfor(DataFinal, "mass")
describe(DataFinal$ha40)


### ELIGIBILITY FOR BMI ###

### WOMEN
##############################################
DataFinal$fem_eligible_bmi <- ifelse(!is.na(DataFinal$ha13),1,0)
DataFinal$fem_eligible_bmi[DataFinal$age>49 & !is.na(DataFinal$age)] <- 0  
DataFinal$hh_n_fem_eligible_bmi <- ave(DataFinal$fem_eligible_bmi, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=T))
    # Number of eligible women for BMI in the hh
DataFinal$no_fem_eligible_bmi <- ifelse(DataFinal$hh_n_fem_eligible_bmi==0,1,0)
    # Takes value 1 if the household had no eligible females for an interview
label(DataFinal$no_fem_eligible_bmi) <- "Household has no eligible women"
table(DataFinal$no_fem_eligible_bmi, useNA = "always")


### No Eligible Women or Children for BMI
##############################################
# NOTE: In the DHS datasets, we use this variable as a control variable for the nutrition indicator if nutrition data 
# is present for children, women and men. 
DataFinal$no_eligibles_bmi <- ifelse(DataFinal$no_fem_eligible_bmi==1 & DataFinal$no_child_eligible==1,1,0)
label(DataFinal$no_eligibles_bmi) <- "Household has no eligible women or children for BMI"
table(DataFinal$no_eligibles_bmi, useNA = "always")


### BMI Indicator for Women 15-49 years 
############################################## 
DataFinal$f_bmi <- DataFinal$ha40/100
    # Low BMI of women 15-49 years	
label(DataFinal$f_bmi) <- "Women's BMI"

DataFinal$f_low_bmi <- ifelse(DataFinal$f_bmi<18.5,1,0)
DataFinal$f_low_bmi[is.na(DataFinal$f_bmi) | DataFinal$f_bmi>=99.90] <- NA
DataFinal$f_low_bmi[DataFinal$age>49 & !is.na(DataFinal$age)] <- NA
label(DataFinal$f_low_bmi) <- "BMI of women < 18.5"

DataFinal$temp <- ave(DataFinal$f_low_bmi, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$low_bmi[DataFinal$temp== 0] <- 0
DataFinal$low_bmi[DataFinal$temp== 1] <- 1

DataFinal$hh_no_low_bmi <- ifelse(DataFinal$low_bmi==0,1,0)
    # Under this section, households take a value of '1' if no women in the household has low bmi
DataFinal$hh_no_low_bmi[is.na(DataFinal$low_bmi)] <- NA
    # Under this section, households take a value of '.' if there is no information from eligible women
DataFinal$hh_no_low_bmi[DataFinal$no_fem_eligible_bmi==1] <- 1
    # Under this section, households that don't have eligible female population are identified as non-deprived in 
    # nutrition. 
DataFinal$temp <- NULL
DataFinal$low_bmi <- NULL
label(DataFinal$hh_no_low_bmi) <- "Household has no adult with low BMI"
table(DataFinal$hh_no_low_bmi, useNA = "always")
    # Figures are exclusively based on information from eligible adult women (15-49 years)



### BMI Indicator for Men not collected 
############################################## 
DataFinal$m_bmi <- NA
label(DataFinal$m_bmi) <- "Male's BMI "

DataFinal$m_low_bmi <- NA
label(DataFinal$m_low_bmi) <- "BMI of male < 18.5"



### BMI-for-age for individuals 15-19 years and BMI for individuals 20-49 years 
##############################################
DataFinal$low_bmi_byage <- 0
label(DataFinal$low_bmi_byage) <- "Individuals with low BMI or BMI-for-age"

DataFinal$low_bmi_byage[DataFinal$f_low_bmi==1] <- 1
    # Replace variable "low_bmi_byage = 1" if eligible women have low BMI
# Note: The following command will result in 0 changes when there is no BMI information from men

DataFinal$low_bmi_byage[DataFinal$low_bmi_byage==0 & DataFinal$m_low_bmi==1] <- 1 
    # Replace variable "low_bmi_byage = 1" if eligible men have low BMI


# Note: The following command replaces BMI with BMI-for-age for those between the age group of 15-19 by their age in 
# months where information is available 

# Replacement for girls: 
DataFinal$low_bmi_byage[DataFinal$low_bmiage==1 & !is.na(DataFinal$age_month)] <- 1
DataFinal$low_bmi_byage[DataFinal$low_bmiage==0 & !is.na(DataFinal$age_month)] <- 0

# Note: The following control variable is applied when there is BMI information for women and men, as well as 
# BMI-for-age for teenagers 
DataFinal$low_bmi_byage[is.na(DataFinal$f_low_bmi) & is.na(DataFinal$low_bmiage)] <- NA
DataFinal$temp <- ave(DataFinal$low_bmi_byage, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$low_bmi[DataFinal$temp==1] <- 1
DataFinal$low_bmi[DataFinal$temp==0] <- 0
DataFinal$hh_no_low_bmiage <- ifelse(DataFinal$low_bmi==0,1,0)
    # Households take a value of '1' if all eligible adults and teenagers in the household has normal bmi or 
    # bmi-for-age 

DataFinal$hh_no_low_bmiage[is.na(DataFinal$low_bmi)] <- NA
    # Households take a value of '.' if there is no information from eligible individuals in the household 

DataFinal$hh_no_low_bmiage[DataFinal$no_fem_eligible_bmi==1] <- 1 
    # Households take a value of '1' if there is no eligible population.
DataFinal$temp <- NULL
DataFinal$low_bmi <- NULL
label(DataFinal$hh_no_low_bmiage) <- "Household has no adult with low BMI or BMI-for-age"
table(DataFinal$hh_no_low_bmi[DataFinal$subsample==1], useNA = "always")	
table(DataFinal$hh_no_low_bmiage[DataFinal$subsample==1], useNA = "always")

# NOTE that hh_no_low_bmi takes value 1 if: (a) no any eligible adult in the household has (observed) low BMI or (b) 
# there are no eligible adults in the household. One has to check and adjust the dofile so all people who are eligible
# and/or measured are included. It is particularly important to check if male are measured and what age group among 
# males and females. The variable takes values 0 for those households that have at least one adult with observed low BMI.
# The variable has a missing value only when there is missing info on BMI for ALL eligible adults in the household 



########################################################################################################################
### Step 2.3b Child Nutrition 
########################################################################################################################

### Child Underweight Indicator 
############################################## 
DataFinal$temp <- ave(DataFinal$underweight, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp_underweight[DataFinal$temp==1] <- 1
DataFinal$temp_underweight[DataFinal$temp==0] <- 0
DataFinal$hh_no_underweight <- ifelse(DataFinal$temp_underweight==0,1,0) 
    # Takes value 1 if no child in the hh is underweight 
DataFinal$hh_no_underweight[is.na(DataFinal$temp_underweight)] <- NA
DataFinal$hh_no_underweight[DataFinal$no_child_eligible==1] <- 1 
    # Households with no eligible children will receive a value of 1 
label(DataFinal$hh_no_underweight) <- "Household has no child underweight - 2 stdev"
DataFinal$temp <- NULL
DataFinal$temp_underweight <- NULL
table(DataFinal$hh_no_underweight, useNA = "always")


### Child Stunting Indicator 
############################################## 
DataFinal$temp <- ave(DataFinal$stunting, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp_stunting[DataFinal$temp==1] <- 1
DataFinal$temp_stunting[DataFinal$temp==0] <- 0
DataFinal$hh_no_stunting <- ifelse(DataFinal$temp_stunting==0,1,0) 
    # Takes value 1 if no child in the hh is stunted
DataFinal$hh_no_stunting[is.na(DataFinal$temp_stunting)] <- NA
DataFinal$hh_no_stunting[DataFinal$no_child_eligible==1] <- 1 
label(DataFinal$hh_no_stunting) <- "Household has no child stunted - 2 stdev"
DataFinal$temp <- NULL
DataFinal$temp_stunting <- NULL
table(DataFinal$hh_no_stunting, useNA = "always")


### Child Either Stunted or Underweight Indicator 
############################################## 
DataFinal$uw_st[DataFinal$stunting==1 | DataFinal$underweight==1]  <- 1
DataFinal$uw_st[DataFinal$stunting==0 & DataFinal$underweight==0] <- 0
DataFinal$uw_st[is.na(DataFinal$stunting) & is.na(DataFinal$underweight)] <- NA
DataFinal$temp <- ave(DataFinal$uw_st, DataFinal$hh_id, FUN = function(x) max(x,na.rm=T))
DataFinal$temp_uw_st[DataFinal$temp==1] <- 1
DataFinal$temp_uw_st[DataFinal$temp==0] <- 0
DataFinal$hh_no_uw_st <- ifelse(DataFinal$temp_uw_st==0,1,0) 
    # Takes value 1 if no child in the hh is underweight or stunted
DataFinal$hh_no_uw_st[is.na(DataFinal$temp_uw_st)] <- NA
DataFinal$hh_no_uw_st[DataFinal$no_child_eligible==1] <- 1
    # Households with no eligible children will receive a value of 1 
label(DataFinal$hh_no_uw_st) <- "Household has no child underweight or stunted"
DataFinal$temp <- NULL
DataFinal$temp_uw_st <- NULL
table(DataFinal$hh_no_uw_st, useNA = "always")


########################################################################################################################
### Step 2.3c Household Nutrition Indicator 
########################################################################################################################
DataFinal$hh_nutrition_uw_st[(DataFinal$hh_no_low_bmiage==1 & DataFinal$hh_no_uw_st==1) |
                            (is.na(DataFinal$hh_no_low_bmiage) & DataFinal$hh_no_uw_st==1 & DataFinal$no_child_eligible==0) | 
                              (DataFinal$hh_no_low_bmiage==1 & is.na(DataFinal$hh_no_uw_st) & DataFinal$no_fem_eligible_bmi==0)] <- 1
DataFinal$hh_nutrition_uw_st[DataFinal$hh_no_low_bmiage==0 | DataFinal$hh_no_uw_st==0] <- 0
DataFinal$hh_nutrition_uw_st[is.na(DataFinal$hh_no_low_bmiage) & is.na(DataFinal$hh_no_uw_st)] <- NA
DataFinal$hh_nutrition_uw_st[DataFinal$no_eligibles_bmi==1] <- 1
    # If country have collected anthropometric data from women, child 0-5 & a subsample of men, we only replace households 
    # which do not have any of these three applicable population as non-deprived
label(DataFinal$hh_nutrition_uw_st) <- "Household has no child underweight/stunted or adult deprived by BMI/BMI-for-age"
table(DataFinal$hh_nutrition_uw_st, useNA = "always")


########################################################################################################################
### Step 2.4 Child Mortality 
########################################################################################################################
describe(DataFinal$v206)
describe(DataFinal$v207)
describe(DataFinal$mv206)
describe(DataFinal$mv207)
    # v206 or mv206: number of sons who have died 
    # v207 or mv207: number of daughters who have died

# Total child mortality reported by eligible women
DataFinal$temp_f <- rowSums(DataFinal[c("v206", "v207")])
DataFinal$temp_f[DataFinal$v201==0] <- 0
DataFinal$child_mortality_f <- ave(DataFinal$temp_f, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE))
DataFinal$temp_miss_f <- 1
DataFinal$temp_miss_f[is.na(DataFinal$temp_f)] <- 0
DataFinal$child_mortality_temp_miss_f <- ave(DataFinal$temp_miss_f, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$child_mortality_f[DataFinal$child_mortality_f==0 & DataFinal$child_mortality_temp_miss_f==0 & is.na(DataFinal$temp_f) & is.na(DataFinal$v206) &  is.na(DataFinal$v207)] <- NA
label(DataFinal$child_mortality_f) <- "Occurrence of child mortality reported by women"
table(DataFinal$child_mortality_f, useNA = "always")
DataFinal$temp_f <- NULL
DataFinal$temp_miss_f <- NULL
DataFinal$child_mortality_temp_miss_f <- NULL

# Total child mortality reported by eligible men	
DataFinal$temp_m <- rowSums(DataFinal[c("mv206", "mv207")])
DataFinal$temp_m[DataFinal$mv201==0] <- 0
DataFinal$child_mortality_m <- ave(DataFinal$temp_m, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE))
DataFinal$temp_miss_m <- 1
DataFinal$temp_miss_m [is.na(DataFinal$temp_m)] <- 0
DataFinal$child_mortality_temp_miss_m <- ave(DataFinal$temp_miss_m, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$child_mortality_m[DataFinal$child_mortality_m==0 & DataFinal$child_mortality_temp_miss_m==0 & is.na(DataFinal$temp_m) & is.na(DataFinal$mv206) &  is.na(DataFinal$mv207)] <- NA
label(DataFinal$child_mortality_m) <- "Occurrence of child mortality reported by men"
table(DataFinal$child_mortality_m, useNA = "always")
DataFinal$temp_m <- NULL
DataFinal$temp_miss_m <- NULL
DataFinal$child_mortality_temp_miss_m <- NULL

DataFinal$child_mortality <- apply(DataFinal[c("child_mortality_f", "child_mortality_m")], 1, max, na.rm=TRUE)
DataFinal$child_mortality[DataFinal$child_mortality<0] <- NA
label(DataFinal$child_mortality) <- "Total child mortality within household reported by women & men"
table(DataFinal$child_mortality[DataFinal$subsample==1], useNA = "always")	


# Deprived if any children died in the household 
##############################################
DataFinal$hh_mortality <- ifelse(DataFinal$child_mortality==0,1,0)
    # Household is replaced with a value of "1" if there is no incidence of child mortality
DataFinal$hh_mortality[is.na(DataFinal$child_mortality)] <- NA
DataFinal$hh_mortality[DataFinal$no_adults_eligible==1] <- 1
    # Change eligibility to "no_fem_eligible==1" if child mortality indicator is constructed solely using information 
    # from women 
label(DataFinal$hh_mortality) <- "Household had no child mortality"
table(DataFinal$hh_mortality[DataFinal$subsample==1], useNA = "always")


# Deprived if any children died in the household in the last 5 years from the survey year 
##############################################
table(DataFinal$child_died_per_wom_5y, useNA = "always")
    # The 'child_died_per_wom_5y' variable was constructed in Step 1.2 using information from individual women who ever 
    # gave birth in the BR file. The missing values represent eligible woman who have never ever given birth and so are
    # not present in the BR file. But these 'missing women' may be living in households where there are other women with
    # child mortality information from the BR file. So at this stage, it is important that we aggregate the information 
    # that was obtained from the BR file at the household level. Thisens ures that women who were not present in the BR 
    # file is assigned with a value, following the information provided by other women in the household

DataFinal$child_died_per_wom_5y[DataFinal$v201==0] <- 0 
    # Assign a value of "0" for:
    # - all eligible women who never ever gave birth 
DataFinal$child_died_per_wom_5y[DataFinal$no_fem_eligible==1] <- 0
    # Assign a value of "0" for:
    # - individuals living in households that have non-eligible women 	

DataFinal$temp_child_mortality_5y <- ave(DataFinal$child_died_per_wom_5y, DataFinal$hh_id, FUN = function(x) sum(x,na.rm=TRUE)) 
DataFinal$temp_child_mortality_5y_miss <- 1
DataFinal$temp_child_mortality_5y_miss[is.na(DataFinal$child_died_per_wom_5y)] <- 0
DataFinal$child_mortality_5y_miss <- ave(DataFinal$temp_child_mortality_5y_miss, DataFinal$hh_id, FUN = function(x) max(x,na.rm=TRUE))
DataFinal$temp_child_mortality_5y[DataFinal$temp_child_mortality_5y==0 & DataFinal$child_mortality_5y_miss==0] <- NA
DataFinal$child_mortality_5y <- DataFinal$temp_child_mortality_5y
DataFinal$child_mortality_5y[is.na(DataFinal$temp_child_mortality_5y) & DataFinal$child_mortality==0] <- 0
    # Replace all households as 0 death if women has missing value and men reported no death in those households

label(DataFinal$child_mortality_5y) <- "Total child mortality within household past 5 years reported by women"
table(DataFinal$child_mortality_5y[DataFinal$subsample==1], useNA = "always")

# The new standard MPI indicator takes a value of "1" if eligible women within the household reported no child mortality
# or if any child died longer than 5 years from the survey year. The indicator takes a value of "0" if women in the 
# household reported any child mortality in the last 5 years from the survey year. Households were replaced with a value
# of "1" if eligible men within the household reported no child mortality in the absence of information from women. The 
# indicator takes a missing value if there was missing information on reported death from eligible individuals.

DataFinal$hh_mortality_5y <- ifelse(DataFinal$child_mortality_5y==0,1,0)
DataFinal$hh_mortality_5y[is.na(DataFinal$child_mortality_5y)] <- NA
table(DataFinal$hh_mortality_5y[DataFinal$subsample==1], useNA = "always")	
label(DataFinal$hh_mortality_5y) <- "Household had no child mortality in the last 5 years"


########################################################################################################################
### Step 2.5 Electricity 
########################################################################################################################
# Members of the household are considered deprived if the household has no electricity 
DataFinal$electricity <- DataFinal$hv206 
describe(DataFinal$electricity)
table(DataFinal$electricity, useNA = "always")
label(DataFinal$electricity) <- "Household has electricity"


########################################################################################################################
### Step 2.6 Sanitation 
########################################################################################################################
# Members of the household are considered deprived if the household's sanitation facility is not improved, according to 
# MDG guidelines, or it is improved but shared with other household. In cases of mismatch between the MDG guideline and 
# country report, we followed the country report. 
DataFinal$toilet <- DataFinal$hv205  
describe(DataFinal$toilet)
table(DataFinal$toilet, useNA = "always") 
describe(DataFinal$hv225)
table(DataFinal$hv225, useNA = "always")  
DataFinal$shared_toilet <- DataFinal$hv225 
    # 0=no;1=yes;.=missing
DataFinal$toilet_mdg[(DataFinal$toilet==11 | DataFinal$toilet==12 | DataFinal$toilet==13 | DataFinal$toilet==21 | DataFinal$toilet==22 |
                       DataFinal$toilet==41 | DataFinal$toilet==44) & DataFinal$shared_toilet!=1] <- 1 
DataFinal$toilet_mdg[DataFinal$toilet == 14 | DataFinal$toilet ==15 | DataFinal$toilet==23 | DataFinal$toilet==31 |
                       DataFinal$toilet==42 | DataFinal$toilet==43 | DataFinal$toilet==96] <-0
DataFinal$toilet_mdg[DataFinal$shared_toilet==1] <- 0
DataFinal$toilet_mdg[is.na(DataFinal$toilet) | DataFinal$toilet==99] <- NA
label(DataFinal$toilet_mdg) <- "Household has improved sanitation with MDG Standards"
table(DataFinal$toilet, DataFinal$toilet_mdg, useNA = "always")


########################################################################################################################
### Step 2.7 Drinking Water  
########################################################################################################################
# Members of the household are considered deprived if the household does not have access to safe drinking water according
# to MDG guidelines, or safe drinking water is more than a 30-minute walk from home roundtrip. In cases of mismatch 
# between the MDG guideline and country report, we followed the country report.
DataFinal$water <- DataFinal$hv201  
DataFinal$timetowater <- DataFinal$hv204  
describe(DataFinal$water)
table(DataFinal$water, useNA = "always")	
DataFinal$ndwater <- DataFinal$hv202  
    # Non-drinking water - no observation
DataFinal$water_mdg[DataFinal$water==11 | DataFinal$water==12 | DataFinal$water==13 | DataFinal$water==14 | DataFinal$water==21 | 
                      DataFinal$water==31 | DataFinal$water==41 | DataFinal$water==51 | ((DataFinal$water==71 | DataFinal$water==72) & 
                      (DataFinal$ndwater==11 | DataFinal$ndwater==12 | DataFinal$ndwater==13 | DataFinal$ndwater==14 | DataFinal$ndwater==21 |
                         DataFinal$ndwater==31))] <- 1
DataFinal$water_mdg[DataFinal$water==32 | DataFinal$water==42 | DataFinal$water==43 | DataFinal$water==61 | DataFinal$water==62 | 
                      ((DataFinal$water==71 | DataFinal$water==72) & (DataFinal$ndwater==32 | DataFinal$ndwater==96)) | DataFinal$water==96] <- 0
DataFinal$water_mdg[(DataFinal$water_mdg==1 | is.na(DataFinal$water_mdg)) & DataFinal$timetowater >= 30 & 
                      !is.na(DataFinal$timetowater) & DataFinal$timetowater!=996 & DataFinal$timetowater!=998 & DataFinal$timetowater!=999] <- 0 
    # Deprived if water is at more than 30 minutes' walk (roundtrip) 
DataFinal$water_mdg[is.na(DataFinal$water)| DataFinal$water==99] <- NA
label(DataFinal$water_mdg) <- "Household has drinking water with MDG standards (considering distance)"
table(DataFinal$water, DataFinal$water_mdg, useNA = "always")


########################################################################################################################
### Step 2.8 Housing 
########################################################################################################################
# Members of the household are considered deprived if the household has a dirt, sand or dung floor
DataFinal$floor <- DataFinal$hv213 
describe(DataFinal$floor)
table(DataFinal$floor, useNA = "always")
DataFinal$floor_imp <- 1
DataFinal$floor_imp[DataFinal$floor==11 | DataFinal$floor==12 | DataFinal$floor==96] <- 0  
    # Deprived if "mud/earth", "sand", "dung", "other" 	
DataFinal$floor_imp[is.na(DataFinal$floor)| DataFinal$floor==99] <- NA 
label(DataFinal$floor_imp) <- "Household has floor that it is not earth/sand/dung"
table(DataFinal$floor, DataFinal$floor_imp, useNA = "always")	

# Members of the household are considered deprived if the household has wall made of natural or rudimentary materials 
DataFinal$wall <- DataFinal$hv214 
describe(DataFinal$wall)
table(DataFinal$wall, useNA = "always")	
DataFinal$wall_imp <- 1 
DataFinal$wall_imp[DataFinal$wall<=26 | DataFinal$wall==96] <- 0 
    # Deprived if "no wall" "cane/palms/trunk" "mud/dirt" "grass/reeds/thatch" "pole/bamboo with mud" "stone with mud"
    # "plywood""cardboard" "carton/plastic" "uncovered adobe" "canvas/tent" "unburnt bricks" "reused wood" "other"
DataFinal$wall_imp[is.na(DataFinal$wall) | DataFinal$wall==99] <- NA 	
label(DataFinal$wall_imp) <- "Household has wall that it is not of low quality materials"
table(DataFinal$wall, DataFinal$wall_imp, useNA = "always")	


# Members of the household are considered deprived if the household has roof made of natural or rudimentary materials 
DataFinal$roof <- DataFinal$hv215
describe(DataFinal$roof)
table(DataFinal$roof, useNA = "always")		
DataFinal$roof_imp <- 1 
DataFinal$roof_imp[DataFinal$roof<=26 | DataFinal$roof==96] <- 0 
    # Deprived if "no roof" "thatch/palm leaf" "mud/earth/lump of earth""sod/grass" "plastic/polythene sheeting" 
    # "rustic mat" "cardboard" "canvas/tent" "wood planks/reused wood" "unburnt bricks" "other"
DataFinal$roof_imp[is.na(DataFinal$roof) | DataFinal$roof==99] 	
label(DataFinal$roof_imp) <- "Household has roof that it is not of low quality materials"
table(DataFinal$roof, DataFinal$roof_imp, useNA = "always")


#*Household is deprived in housing if the roof, floor OR walls uses low quality materials.
DataFinal$housing_1 <- 1
DataFinal$housing_1[DataFinal$floor_imp==0 | DataFinal$wall_imp==0 | DataFinal$roof_imp==0] <- 0
DataFinal$housing_1[is.na(DataFinal$floor_imp) & is.na(DataFinal$wall_imp) & is.na(DataFinal$roof_imp)] <- NA
label(DataFinal$housing_1) <- "Household has roof, floor & walls that it is not low quality material"
table(DataFinal$housing_1, useNA = "always")


########################################################################################################################
### Step 2.9 Cooking Fuel 
########################################################################################################################
# Members of the household are considered deprived if the household cooks with solid fuels: wood, charcoal, crop 
# residues or dung. "Indicators for Monitoring the Millennium Development Goals", p. 63 
DataFinal$cookingfuel <- DataFinal$hv226  
describe(DataFinal$cookingfuel)
table(DataFinal$cookingfuel, useNA = "always")

DataFinal$cooking_mdg[DataFinal$cookingfuel<=5 | DataFinal$cookingfuel==95 | DataFinal$cookingfuel==96] <- 1
DataFinal$cooking_mdg[(DataFinal$cookingfuel>5 & DataFinal$cookingfuel<=11)] <- 0
DataFinal$cooking_mdg[is.na(DataFinal$cookingfuel)| DataFinal$cookingfuel==99] <- NA
label(DataFinal$cooking_mdg) <- "Househod has cooking fuel according to MDG standards"
    # DHS report page 23 
    # The report does not consider "kerosene/paraffin" as a clean source but it also does not consider it solid fuel. I 
    # follow the indicator definition that states that a household is deprived if it cooks with dung, wood, charcoal or 
    # coal; therefore "kerosene/paraffin" is considered nor deprived.
    # Non deprived if: 1 "electricity", 2 "lpg", 3 "natural gas", 4 "biogas", 5 "kerosene" , 95 "no food cooked in 
    # household", 96 "other", 12 "electricity from generator", 13 "electricity from other source", 14 "solar energy"
    # Deprived if: 6 "coal/lignite", 7 "charcoal", 8 "wood", 9 "straw/shrubs/grass" 10 "agricultural crop", 11 "animal dung"
table(DataFinal$cookingfuel, DataFinal$cooking_mdg, useNA = "always")	


########################################################################################################################
### Step 2.10 Assets ownership 
########################################################################################################################
# Members of the household are considered deprived if the household does not own more than one of: radio, TV, telephone,
# bike, motorbike or refrigerator and does not own a car or truck. 
  
# Check that for standard assets in living standards: "no"==0 and yes=="1"
describe(DataFinal$hv208)
describe(DataFinal$hv207)
describe(DataFinal$hv221)
describe(DataFinal$hv243a)
describe(DataFinal$hv209)
describe(DataFinal$hv212)
describe(DataFinal$hv210)
describe(DataFinal$hv211)
describe(DataFinal$hv243c)
describe(DataFinal$hv243e)

DataFinal$television <- DataFinal$hv208 
DataFinal$bw_television  <- NA
DataFinal$radio <- DataFinal$hv207 
DataFinal$telephone <- DataFinal$hv221 
DataFinal$mobiletelephone <- DataFinal$hv243a  
DataFinal$refrigerator <- DataFinal$hv209 
DataFinal$car <- DataFinal$hv212  	
DataFinal$bicycle <- DataFinal$hv210 
DataFinal$motorbike <- DataFinal$hv211 
DataFinal$computer <- DataFinal$hv243e
DataFinal$animal_cart <- DataFinal$hv243c


# Group telephone and mobiletelephone as a single variable
DataFinal$telephone[DataFinal$telephone==0 & DataFinal$mobiletelephone==1] <- 1
DataFinal$telephone[is.na(DataFinal$telephone) & DataFinal$mobiletelephone==1] <- NA
    
# Members of the household are considered deprived in assets if the household does not own more than one of: radio, 
# TV, telephone, bike, motorbike, refrigerator, computer or animal_cart and does not own a car or truck.
DataFinal$n_small_assets2 <- rowSums(DataFinal[c("television", "radio", "telephone", "refrigerator", "bicycle", 
                                                "motorbike", "computer", "animal_cart")])
label(DataFinal$n_small_assets2) <- "Household Number of Small Assets Owned" 
    
DataFinal$hh_assets2 <- ifelse(DataFinal$car==1 | DataFinal$n_small_assets2 > 1, 1,0) 
DataFinal$hh_assets2[is.na(DataFinal$car) & is.na(DataFinal$n_small_assets2)] <- NA
label(DataFinal$hh_assets2) <- "Household Asset Ownership: HH has car or more than 1 small assets incl computer & animal cart"
    
    
    
########################################################################################################################
### Step 2.11 Rename and keep variables for MPI calculation 
########################################################################################################################
# Retain data on sampling design: 
str(DataFinal$hv022)
str(DataFinal$hv021)	
DataFinal$strata <- DataFinal$hv022
DataFinal$psu <- DataFinal$hv021
    
    
# Retain year, month & date of interview:
str(DataFinal$hv007)
str(DataFinal$hv006)
str(DataFinal$hv008)
DataFinal$year_interview <- DataFinal$hv007 	
DataFinal$month_interview <- DataFinal$hv006 
DataFinal$date_interview <- DataFinal$hv008
    
    
### Rename key global MPI indicators for estimation 
DataFinal$d_cm <- ifelse(DataFinal$hh_mortality_5y==0,1,0)
DataFinal$d_nutr <- ifelse(DataFinal$hh_nutrition_uw_st==0,1,0)
DataFinal$d_satt <- ifelse(DataFinal$hh_child_atten==0,1,0)
DataFinal$d_educ <- ifelse(DataFinal$hh_years_edu6==0,1,0)
DataFinal$d_elct <- ifelse(DataFinal$electricity==0,1,0)
DataFinal$d_wtr <- ifelse(DataFinal$water_mdg==0,1,0)
DataFinal$d_sani <- ifelse(DataFinal$toilet_mdg==0,1,0)
DataFinal$d_hsg <- ifelse(DataFinal$housing_1==0,1,0)
DataFinal$d_ckfl <- ifelse(DataFinal$cooking_mdg ==0,1,0)
DataFinal$d_asst <- ifelse(DataFinal$hh_assets2==0,1,0)

DataFinal <- DataFinal[c("hh_id", "ind_id", "ccty", "ccnum", "cty", "survey", "year", "subsample",
                         "strata", "psu", "weight", "area", "relationship", "sex", "age", "agec7", "agec4", "marital", "hhsize",
                         "region", "year_interview", "month_interview", "date_interview", 
                         "d_cm", "d_nutr", "d_satt", "d_educ", "d_elct", "d_wtr", "d_sani", "d_hsg", "d_ckfl", "d_asst",
                         "hh_mortality_5y", "hh_nutrition_uw_st", "hh_child_atten", "hh_years_edu6", "electricity", "water_mdg",
                         "toilet_mdg", "housing_1", "cooking_mdg", "hh_assets2")] 
    
    
### Sort, compress and save data for estimation 
DataFinal[order(DataFinal$ind_id),] 
write_dta(DataFinal, (file.path(path_out, "ben_dhs18_pov.dta")))

  
    
########################################################################################################################
### MPI Calculation (TTD file)
########################################################################################################################
# SELECT COUNTRY POV FILE RUN ON LOOP FOR MORE COUNTRIES
DataTTD <- read_stata(file.path(path_in,"ben_dhs18_pov.dta"))

  
########################################################################################################################
### Define Sample Weight and total population ***
########################################################################################################################
DataTTD$sample_weight = DataTTD$weight/1000000 
    # only DHS

DataTTD$country = "Benin" 
DataTTD$countrycode = "BEN"  
    # change to weight if MICS
    

########################################################################################################################
### List of the 10 indicators included in the MPI 
########################################################################################################################
DataTTD$edu_1 <- DataTTD$hh_years_edu6
DataTTD$atten_1 <- DataTTD$hh_child_atten
DataTTD$cm_1 <- DataTTD$hh_mortality_5y
    # change countries with no child mortality 5 year to child mortality ever
DataTTD$nutri_1 <- DataTTD$hh_nutrition_uw_st
DataTTD$elec_1 <- DataTTD$electricity
DataTTD$toilet_1 <- DataTTD$toilet_mdg
DataTTD$water_1 <- DataTTD$water_mdg
DataTTD$house_1 <- DataTTD$housing_1
DataTTD$fuel_1 <- DataTTD$cooking_mdg
DataTTD$asset_1 <- DataTTD$hh_assets2
    
 
########################################################################################################################
### List of sample without missing values ***
########################################################################################################################
DataTTD$sample_1 <- ifelse(!is.na(DataTTD$edu_1) & !is.na(DataTTD$atten_1) & !is.na(DataTTD$cm_1) & 
                             !is.na(DataTTD$nutri_1) & !is.na(DataTTD$elec_1) & !is.na(DataTTD$toilet_1) & 
                             !is.na(DataTTD$water_1) & !is.na(DataTTD$house_1) & !is.na(DataTTD$fuel_1) & 
                             !is.na(DataTTD$asset_1), 1,0)

DataTTD$sample_1[DataTTD$subsample==0] <- NA
       # Note: If the anthropometric data was collected from a subsample of the total population that was sampled, 
       # then the final analysis only includes the subsample population. 
       # Percentage sample after dropping missing values 

# Survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD$per_sample_weighted_1 <- svymean(~sample_1, DataTTD_weight) 
DataTTD$per_sample_1 <-  mean(DataTTD$sample_1) 
table(DataTTD$per_sample_weighted_1, useNA = "always")
table(DataTTD$per_sample_1, useNA = "always")     


########################################################################################################################
### Define deprivation matrix 'g0' which takes values 1 if individual is deprived in the particular indicator according 
### to deprivation cutoff z as defined during step 2 ***
########################################################################################################################
DataTTD$g01_edu_1 <- ifelse(DataTTD$edu_1==1,0,1)
DataTTD$g01_atten_1 <- ifelse(DataTTD$atten_1==1,0,1)
DataTTD$g01_cm_1 <- ifelse(DataTTD$cm_1==1,0,1)
DataTTD$g01_nutri_1 <- ifelse(DataTTD$nutri_1==1,0,1)
DataTTD$g01_elec_1 <- ifelse(DataTTD$elec_1==1,0,1)
DataTTD$g01_toilet_1 <- ifelse(DataTTD$toilet_1==1,0,1)
DataTTD$g01_water_1 <- ifelse(DataTTD$water_1==1,0,1)
DataTTD$g01_house_1 <- ifelse(DataTTD$house_1==1,0,1)
DataTTD$g01_fuel_1 <- ifelse(DataTTD$fuel_1==1,0,1)
DataTTD$g01_asset_1 <- ifelse(DataTTD$asset_1==1,0,1)

# Renew survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)

### Raw Headcount Ratios
DataTTD$raw1_edu_1 <- svymean(~g01_edu_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_edu_1) <- "Raw Headcount: Percentage of people who are deprived in edu_1"
DataTTD$raw1_atten_1 <- svymean(~g01_atten_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_atten_1) <- "Raw Headcount: Percentage of people who are deprived in atten_1"
DataTTD$raw1_cm_1 <- svymean(~g01_cm_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_cm_1) <- "Raw Headcount: Percentage of people who are deprived in cm_1"
DataTTD$raw1_nutri_1 <- svymean(~g01_nutri_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_nutri_1) <- "Raw Headcount: Percentage of people who are deprived in nutri_1"
DataTTD$raw1_elec_1 <- svymean(~g01_elec_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_elec_1) <- "Raw Headcount: Percentage of people who are deprived in elec_1"
DataTTD$raw1_toilet_1 <- svymean(~g01_toilet_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_toilet_1) <- "Raw Headcount: Percentage of people who are deprived in toilet_1"
DataTTD$raw1_water_1 <- svymean(~g01_water_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_water_1) <- "Raw Headcount: Percentage of people who are deprived in water_1"
DataTTD$raw1_house_1 <- svymean(~g01_house_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_house_1) <- "Raw Headcount: Percentage of people who are deprived in house_1"
DataTTD$raw1_fuel_1 <- svymean(~g01_fuel_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_fuel_1) <- "Raw Headcount: Percentage of people who are deprived in fuel_1"
DataTTD$raw1_asset_1 <- svymean(~g01_asset_1, DataTTD_weight_subset)*100
label(DataTTD$raw1_asset_1) <- "Raw Headcount: Percentage of people who are deprived in asset_1"

        
########################################################################################################################
### Define vector 'w' of dimensional and indicator weight
########################################################################################################################
# If survey lacks one or more indicators, weights need to be adjusted within /each dimension such that each dimension 
# weighs 1/3 and the indicator weights add up to one (100%). CHECK COUNTRY FILE

## DIMENSION EDUCATION 
DataTTD$w1_edu_1 <- 1/6
DataTTD$w1_atten_1 <- 1/6

## DIMENSION HEALTH
DataTTD$w1_cm_1 <- 1/6
DataTTD$w1_nutri_1 <- 1/6

## DIMENSION LIVING STANDARD
DataTTD$w1_elec_1 <- 1/18
DataTTD$w1_toilet_1 <- 1/18
DataTTD$w1_water_1 <- 1/18
DataTTD$w1_house_1 <- 1/18
DataTTD$w1_fuel_1 <- 1/18
DataTTD$w1_asset_1 <- 1/18

 
########################################################################################################################
### Generate the weighted deprivation matrix 'w' * 'g0'
########################################################################################################################  
DataTTD$w1_g0_edu_1   <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_edu_1 * DataTTD$g01_edu_1, NA)
DataTTD$w1_g0_atten_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_atten_1 * DataTTD$g01_atten_1, NA)
DataTTD$w1_g0_cm_1    <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_cm_1 * DataTTD$g01_cm_1, NA)
DataTTD$w1_g0_nutri_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_nutri_1 * DataTTD$g01_nutri_1, NA)
DataTTD$w1_g0_elec_1  <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_elec_1 * DataTTD$g01_elec_1, NA)
DataTTD$w1_g0_toilet_1 <-ifelse(DataTTD$sample_1 ==1, DataTTD$w1_toilet_1 * DataTTD$g01_toilet_1, NA)
DataTTD$w1_g0_water_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_water_1 * DataTTD$g01_water_1, NA)
DataTTD$w1_g0_house_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_house_1 * DataTTD$g01_house_1, NA)
DataTTD$w1_g0_fuel_1  <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_fuel_1 * DataTTD$g01_fuel_1, NA)
DataTTD$w1_g0_asset_1 <- ifelse(DataTTD$sample_1 ==1, DataTTD$w1_asset_1 * DataTTD$g01_asset_1, NA)
    # The estimation is based only on observations that have non-missing values for all variables in varlist_pov


########################################################################################################################
### Generate the vector of individual weighted deprivation count 'c'
########################################################################################################################
DataTTD$c_vector_1 <- ifelse(DataTTD$sample_1 ==1, rowSums(DataTTD[c("w1_g0_edu_1", "w1_g0_atten_1", "w1_g0_cm_1",
                                                                      "w1_g0_nutri_1", "w1_g0_elec_1", "w1_g0_toilet_1",
                                                                      "w1_g0_water_1", "w1_g0_house_1", "w1_g0_fuel_1",
                                                                      "w1_g0_asset_1")]), NA)


########################################################################################################################
### Identification step according to poverty cutoff k (20 33 50) 
########################################################################################################################
DataTTD$multidimensionally_poor_1_20 <- ifelse(DataTTD$c_vector_1>=20/100, 1,0)
DataTTD$multidimensionally_poor_1_20[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA
DataTTD$multidimensionally_poor_1_33 <- ifelse(DataTTD$c_vector_1>=33/100, 1,0)
DataTTD$multidimensionally_poor_1_33[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA
DataTTD$multidimensionally_poor_1_50 <- ifelse(DataTTD$c_vector_1>=50/100, 1,0)
DataTTD$multidimensionally_poor_1_50[is.na(DataTTD$c_vector_1) | DataTTD$sample_1!=1] <- NA


########################################################################################################################
### Generate the censored vector of individual weighted deprivation count 'c(k)'
########################################################################################################################
DataTTD$c_censured_vector_1_20 <- ifelse(DataTTD$multidimensionally_poor_1_20==0, 0, DataTTD$c_vector_1)
DataTTD$c_censured_vector_1_33 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, DataTTD$c_vector_1)
DataTTD$c_censured_vector_1_50 <- ifelse(DataTTD$multidimensionally_poor_1_50==0, 0, DataTTD$c_vector_1)
      # Provide a score of zero if a person is not poor


########################################################################################################################
### Define censored deprivation matrix 'g0(k)'  with multidimensionally_poor_1_33
########################################################################################################################
DataTTD$g01_33_edu_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_edu_1))
DataTTD$g01_33_atten_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_atten_1))
DataTTD$g01_33_cm_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_cm_1))
DataTTD$g01_33_nutri_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_nutri_1))
DataTTD$g01_33_elec_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_elec_1))
DataTTD$g01_33_toilet_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_toilet_1))
DataTTD$g01_33_water_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_water_1))
DataTTD$g01_33_house_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                               ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_house_1))
DataTTD$g01_33_fuel_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                                 ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_fuel_1))
DataTTD$g01_33_asset_1 <- ifelse(DataTTD$multidimensionally_poor_1_33==0, 0, 
                                 ifelse(DataTTD$multidimensionally_poor_1_33!=0 & DataTTD$sample_1!=1,NA,DataTTD$g01_asset_1))


########################################################################################################################
### Generates Multidimensional Poverty Index (MPI), Headcount (H) and Intensity of Poverty (A) 
########################################################################################################################
# Renew survey stucture
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD_weight_subset2 <- subset(DataTTD_weight, sample_1==1 & multidimensionally_poor_1_33==1)

### Multidimensional Poverty Index (MPI) 
DataTTD$MPI_1_20 <- svymean(~c_censured_vector_1_20, DataTTD_weight_subset)
label(DataTTD$MPI_1_20) <- "MPI with k=20"
DataTTD$MPI_1_33 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset)
label(DataTTD$MPI_1_33) <- "MPI with k=33"
DataTTD$MPI_1_50 <- svymean(~c_censured_vector_1_50, DataTTD_weight_subset)
label(DataTTD$MPI_1_50) <- "MPI with k=50"

DataTTD$MPI_1 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset)
label(DataTTD$MPI_1) <- "1 Multidimensional Poverty Index (MPI = H*A): Range 0 to 1"

### Headcount (H) 
DataTTD$H_1 <- svymean(~multidimensionally_poor_1_33, DataTTD_weight_subset)*100
label(DataTTD$H_1) <- "1 Headcount ratio: % Population in multidimensional poverty (H)"

### Intensity of Poverty (A) 
DataTTD$A_1 <- svymean(~c_censured_vector_1_33, DataTTD_weight_subset2)*100
label(DataTTD$A_1) <- "1 Intensity of deprivation among the poor (A): Average % of weighted deprivations"

### Population vulnerable to poverty (who experience 20-32.9% intensity of deprivations) 
DataTTD$temp <- ifelse(DataTTD$c_vector_1>=0.2 & DataTTD$c_vector_1<0.3332, 1, 
                              ifelse((DataTTD$c_vector_1<0.2 | DataTTD$c_vector_1>=0.3332) & DataTTD$sample_1!=1, NA,0))
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD$vulnerable_1 <- svymean(~temp, DataTTD_weight_subset)*100
                                  
### Population in severe poverty (with intensity 50% or higher) 
DataTTD$temp2 <- ifelse(DataTTD$c_vector_1>0.49, 1, 
                       ifelse(DataTTD$c_vector_1<=0.49 & DataTTD$sample_1!=1, NA,0))
DataTTD_weight <- svydesign(id = ~ psu,
                            strata = ~strata,
                            weights = ~sample_weight,
                            nest = T,
                            data = DataTTD)
DataTTD_weight_subset <- subset(DataTTD_weight, sample_1==1)
DataTTD$severe_1 <- svymean(~temp2, DataTTD_weight_subset)*100
label(DataTTD$severe_1) <- "1 % Population in severe poverty (with intensity 50% or higher)"

### Censored Headcount
DataTTD$cen1_edu_1 <- svymean(~g01_33_edu_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_edu_1) <- "Censored Headcount: Percentage of people who are poor and deprived in edu_1)"
DataTTD$cen1_atten_1 <- svymean(~g01_33_atten_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_atten_1) <- "Censored Headcount: Percentage of people who are poor and deprived in atten_1)"
DataTTD$cen1_cm_1 <- svymean(~g01_33_cm_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_cm_1) <- "Censored Headcount: Percentage of people who are poor and deprived in cm_1)"
DataTTD$cen1_nutri_1 <- svymean(~g01_33_nutri_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_nutri_1) <- "Censored Headcount: Percentage of people who are poor and deprived in nutri_1)"
DataTTD$cen1_elec_1 <- svymean(~g01_33_elec_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_elec_1) <- "Censored Headcount: Percentage of people who are poor and deprived in elec_1)"
DataTTD$cen1_toilet_1 <- svymean(~g01_33_toilet_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_toilet_1) <- "Censored Headcount: Percentage of people who are poor and deprived in toilet_1)"
DataTTD$cen1_water_1 <- svymean(~g01_33_water_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_water_1) <- "Censored Headcount: Percentage of people who are poor and deprived in water_1)"
DataTTD$cen1_house_1 <- svymean(~g01_33_house_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_house_1) <- "Censored Headcount: Percentage of people who are poor and deprived in house_1)"
DataTTD$cen1_fuel_1 <- svymean(~g01_33_fuel_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_fuel_1) <- "Censored Headcount: Percentage of people who are poor and deprived in fuel_1)"
DataTTD$cen1_asset_1 <- svymean(~g01_33_asset_1, DataTTD_weight_subset)*100
label(DataTTD$cen1_asset_1) <- "Censored Headcount: Percentage of people who are poor and deprived in asset_1)"
   
### Dimensional Contribution
DataTTD$cont1_edu_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_edu_1 * DataTTD$cen1_edu_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_atten_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_atten_1 * DataTTD$cen1_atten_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_cm_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_cm_1 * DataTTD$cen1_cm_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_nutri_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_nutri_1 * DataTTD$cen1_nutri_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_elec_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_elec_1 * DataTTD$cen1_elec_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_toilet_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_toilet_1 * DataTTD$cen1_toilet_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_water_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_water_1 * DataTTD$cen1_water_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_house_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_house_1 * DataTTD$cen1_house_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_fuel_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_fuel_1 * DataTTD$cen1_fuel_1/DataTTD$MPI_1, NA)  
DataTTD$cont1_asset_1 <- ifelse(DataTTD$sample_1==1, DataTTD$w1_asset_1 * DataTTD$cen1_asset_1/DataTTD$MPI_1, NA)  

### Prepare results to export 
rm("DataFinal", "DataTTD_weight", "DataTTD_weight_subset", "DataTTD_weight_subset2")
DataOutput <- DataTTD[c("MPI_1", "H_1", "A_1", "vulnerable_1", "severe_1", 
                        "cont1_nutri_1", "cont1_cm_1", "cont1_edu_1", "cont1_atten_1","cont1_fuel_1", "cont1_toilet_1", 
                        "cont1_water_1", "cont1_elec_1", "cont1_house_1", "cont1_asset_1", 
                        "per_sample_1", "per_sample_weighted_1")] 
DataOutput <- subset(DataOutput, !is.na(cont1_nutri_1))
DataOutput<- DataOutput[!duplicated(DataOutput),]

write.csv(DataOutput, file.path(path_in, "DataOutput_Benin_with_correction.csv"), row.names = T )